version 2023.11.27_11.25.57 adds per-patient analysis

underscore_to_space <- function(x) str_replace_all(x, "_", " ")
underscore_to_space_initial_cap <- function(x) str_replace_all(x, "_", " ") %>% str_to_sentence()
outliers <- read_tsv("../input_data/druggable_outliers_from_treehouse_and_other_cohorts_2023_11_09-13_46_32_2023.tsv") %>%
  mutate(high_level_cohort = ifelse(str_detect(comparison_cohort, "Treehouse"),
                                    "Treehouse",
                                    comparison_cohort))
## Rows: 287 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Sample_ID, comparison_cohort, gene, donor_ID
## lgl (1): pathway_support
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
n_distinct(outliers$Sample_ID)       
## [1] 34
n_distinct(outliers$donor_ID)
## [1] 32

Define cohort codes

cohort_codes <- tibble(
  cohort_name = 
    c("PEDAYA", "TCGA", "TH03_TH34", "Treehouse_pc", "Treehouse_pd"),
  cohort_code = 
    c("P", "T", "S", "C", "D"))

Tile plot of all outliers

ggplot(outliers) +
  geom_tile(aes(x=comparison_cohort,
                y=gene, 
                fill = comparison_cohort)) +
  facet_wrap(~Sample_ID,
             nrow = 1) +
  theme(#axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    axis.text.x = element_blank(),
    strip.text.x = element_text(angle = 90),
        ) +
  xlab("")  +
  scale_fill_bright()

Heatmap shows number of cohorts in which outlier were detected

I can make this look better if we decide to use it, but it’s non-trivial

outliers_heatmap_data <- outliers %>%
  group_by(Sample_ID, gene) %>%
  summarize(n_outliers = n()) 
## `summarise()` has grouped output by 'Sample_ID'. You can override using the
## `.groups` argument.
ggplot(outliers_heatmap_data) +
  geom_tile(aes(x=Sample_ID,
                y=gene,
                fill = n_outliers), 
            color = "black")  +
  #theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) 

library(ggVennDiagram)

update_strings <- function(x) {
  str_replace(x,
              "PEDAYA", 
              "Pediatric") %>%
    str_replace("TH03_TH34",
                "Stanford")%>%
    str_replace("Treehouse_pc",
                "Treehouse pan-cancer")%>%
    str_replace("Treehouse_pd",
                "Treehouse pan-disease")
}

raw_outliers_for_venn <- outliers %>%
  mutate(sample_gene = paste(Sample_ID, gene, sep = "_"),
         comparison_cohort = update_strings(comparison_cohort)) %>%
  arrange(comparison_cohort) %>%
  select(sample_gene, comparison_cohort) %>%
  group_split(comparison_cohort)
raw_outliers_for_venn[[1]]
## # A tibble: 26 × 2
##    sample_gene         comparison_cohort
##    <chr>               <chr>            
##  1 TH34_1162_S01_HMOX1 Pediatric        
##  2 TH34_1149_S02_IGF1  Pediatric        
##  3 TH34_1238_S01_MS4A1 Pediatric        
##  4 TH34_1349_S01_KIT   Pediatric        
##  5 TH34_1349_S02_KIT   Pediatric        
##  6 TH34_1379_S01_FGFR4 Pediatric        
##  7 TH34_1380_S01_FGFR4 Pediatric        
##  8 TH34_1150_S02_JAK1  Pediatric        
##  9 TH34_1150_S02_PDCD1 Pediatric        
## 10 TH34_1399_S01_IGF1  Pediatric        
## # ℹ 16 more rows
list_of_outliers_for_venn <-  lapply(raw_outliers_for_venn, function(x) x %>% pull(sample_gene))
names(list_of_outliers_for_venn) <- unique(outliers$comparison_cohort) %>% update_strings %>% sort

Best venn

ggVennDiagram(list_of_outliers_for_venn,
              label = "count",
              #label_geom = "text"
              ) + 
  scale_fill_distiller(palette = "Greens", 
                       direction = 1) + 
  scale_x_continuous(expand = expansion(mult = .2)) + 
  theme(legend.position="none")

other venn

ggVennDiagram(list_of_outliers_for_venn,
              show_intersect = TRUE)
## Warning in geom_text(aes_string(label = "count", text = "text"), x =
## label_coord[, : Ignoring unknown aesthetics: text
fill_rescale <- function(x) scales::rescale(x, to = c(0, 0.8))

fill_rescale(1:10)
##  [1] 0.00000000 0.08888889 0.17777778 0.26666667 0.35555556 0.44444444
##  [7] 0.53333333 0.62222222 0.71111111 0.80000000
ggVennDiagram(list_of_outliers_for_venn,
              label = "count",
              #label_geom = "text"
              ) + 
  scale_fill_distiller(palette = "Greens", 
                       direction = 1)

ggVennDiagram(list_of_outliers_for_venn,
              label = "count",
              #label_geom = "text"
              ) + 
  scale_fill_distiller(palette = "Spectral", 
                       direction = 1)

ggVennDiagram(list_of_outliers_for_venn) + 
  scale_fill_distiller(palette = "Reds", 
                       direction = 1)

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggVennDiagram_1.2.2 cowplot_1.1.1       gridExtra_2.3      
##  [4] kableExtra_1.3.4    khroma_1.10.0       janitor_2.1.0      
##  [7] forcats_0.5.2       stringr_1.5.0       dplyr_1.0.10       
## [10] purrr_0.3.5         readr_2.1.3         tidyr_1.2.1        
## [13] tibble_3.2.1        ggplot2_3.4.4       tidyverse_1.3.2    
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.6.3            sf_1.0-9            lubridate_1.9.0    
##  [4] bit64_4.0.5         webshot_0.5.4       RColorBrewer_1.1-3 
##  [7] httr_1.4.4          tools_4.2.1         backports_1.4.1    
## [10] bslib_0.5.0         utf8_1.2.3          R6_2.5.1           
## [13] KernSmooth_2.23-20  lazyeval_0.2.2      DBI_1.1.3          
## [16] colorspace_2.1-0    withr_2.5.0         tidyselect_1.2.0   
## [19] bit_4.0.5           compiler_4.2.1      cli_3.6.1          
## [22] rvest_1.0.3         xml2_1.3.3          plotly_4.10.1      
## [25] labeling_0.4.2      sass_0.4.7          scales_1.2.1       
## [28] classInt_0.4-9      proxy_0.4-27        systemfonts_1.0.4  
## [31] digest_0.6.33       yulab.utils_0.0.6   rmarkdown_2.23     
## [34] svglite_2.1.0       pkgconfig_2.0.3     htmltools_0.5.5    
## [37] dbplyr_2.2.1        fastmap_1.1.1       highr_0.10         
## [40] htmlwidgets_1.6.2   rlang_1.1.1         readxl_1.4.1       
## [43] rstudioapi_0.14     jquerylib_0.1.4     farver_2.1.1       
## [46] generics_0.1.3      jsonlite_1.8.7      crosstalk_1.2.0    
## [49] vroom_1.6.0         googlesheets4_1.0.1 magrittr_2.0.3     
## [52] Rcpp_1.0.11         munsell_0.5.0       fansi_1.0.4        
## [55] lifecycle_1.0.3     stringi_1.7.12      yaml_2.3.7         
## [58] snakecase_0.11.0    grid_4.2.1          parallel_4.2.1     
## [61] crayon_1.5.2        haven_2.5.1         hms_1.1.2          
## [64] knitr_1.43          pillar_1.9.0        reprex_2.0.2       
## [67] glue_1.6.2          evaluate_0.21       data.table_1.14.6  
## [70] modelr_0.1.10       vctrs_0.6.3         tzdb_0.3.0         
## [73] cellranger_1.1.0    gtable_0.3.3        assertthat_0.2.1   
## [76] cachem_1.0.8        xfun_0.39           broom_1.0.1        
## [79] e1071_1.7-13        class_7.3-20        googledrive_2.0.0  
## [82] RVenn_1.1.0         viridisLite_0.4.2   gargle_1.2.1       
## [85] units_0.8-1         timechange_0.1.1    ellipsis_0.3.2